{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3",
   "language": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# 第三部分 特征篇（中）"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "## 基于Featuretools的自动特征衍生"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "### 1、导入数据集"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import featuretools as ft\n",
    "from sklearn.datasets import load_iris"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)\n",
       "0                  5.1               3.5                1.4               0.2\n",
       "1                  4.9               3.0                1.4               0.2\n",
       "2                  4.7               3.2                1.3               0.2\n",
       "3                  4.6               3.1                1.5               0.2\n",
       "4                  5.0               3.6                1.4               0.2\n",
       "..                 ...               ...                ...               ...\n",
       "145                6.7               3.0                5.2               2.3\n",
       "146                6.3               2.5                5.0               1.9\n",
       "147                6.5               3.0                5.2               2.0\n",
       "148                6.2               3.4                5.4               2.3\n",
       "149                5.9               3.0                5.1               1.8\n",
       "\n",
       "[150 rows x 4 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>sepal length (cm)</th>\n      <th>sepal width (cm)</th>\n      <th>petal length (cm)</th>\n      <th>petal width (cm)</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>5.1</td>\n      <td>3.5</td>\n      <td>1.4</td>\n      <td>0.2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>4.9</td>\n      <td>3.0</td>\n      <td>1.4</td>\n      <td>0.2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>4.7</td>\n      <td>3.2</td>\n      <td>1.3</td>\n      <td>0.2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4.6</td>\n      <td>3.1</td>\n      <td>1.5</td>\n      <td>0.2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5.0</td>\n      <td>3.6</td>\n      <td>1.4</td>\n      <td>0.2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>145</th>\n      <td>6.7</td>\n      <td>3.0</td>\n      <td>5.2</td>\n      <td>2.3</td>\n    </tr>\n    <tr>\n      <th>146</th>\n      <td>6.3</td>\n      <td>2.5</td>\n      <td>5.0</td>\n      <td>1.9</td>\n    </tr>\n    <tr>\n      <th>147</th>\n      <td>6.5</td>\n      <td>3.0</td>\n      <td>5.2</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>148</th>\n      <td>6.2</td>\n      <td>3.4</td>\n      <td>5.4</td>\n      <td>2.3</td>\n    </tr>\n    <tr>\n      <th>149</th>\n      <td>5.9</td>\n      <td>3.0</td>\n      <td>5.1</td>\n      <td>1.8</td>\n    </tr>\n  </tbody>\n</table>\n<p>150 rows × 4 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 3
    }
   ],
   "source": [
    "feature, target = load_iris(as_frame=True, return_X_y=True) # Load and return the iris dataset.\n",
    "feature"
   ]
  },
  {
   "source": [
    "### 2、构建实体和实体集"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "Entityset: Iris\n",
       "  Entities:\n",
       "    feature [Rows: 150, Columns: 5]\n",
       "  Relationships:\n",
       "    No relationships"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "source": [
    "es = ft.EntitySet(id='Iris') # 实体集\n",
    "es = es.entity_from_dataframe(entity_id='feature', dataframe=feature, index='index', make_index=True) # 实体\n",
    "es"
   ]
  },
  {
   "source": [
    "### 3、特征基元"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                       name         type  dask_compatible  koalas_compatible  \\\n",
       "0           time_since_last  aggregation            False              False   \n",
       "1                       any  aggregation             True              False   \n",
       "2                     first  aggregation            False              False   \n",
       "3                  num_true  aggregation             True              False   \n",
       "4                      skew  aggregation            False              False   \n",
       "..                      ...          ...              ...                ...   \n",
       "74        divide_by_feature    transform             True               True   \n",
       "75    divide_numeric_scalar    transform             True               True   \n",
       "76             greater_than    transform             True              False   \n",
       "77  subtract_numeric_scalar    transform             True               True   \n",
       "78                  cum_min    transform            False              False   \n",
       "\n",
       "                                          description  \n",
       "0   Calculates the time elapsed since the last dat...  \n",
       "1        Determines if any value is 'True' in a list.  \n",
       "2               Determines the first value in a list.  \n",
       "3                 Counts the number of `True` values.  \n",
       "4   Computes the extent to which a distribution di...  \n",
       "..                                                ...  \n",
       "74         Divide a scalar by each value in the list.  \n",
       "75       Divide each element in the list by a scalar.  \n",
       "76  Determines if values in one list are greater t...  \n",
       "77   Subtract a scalar from each element in the list.  \n",
       "78                 Calculates the cumulative minimum.  \n",
       "\n",
       "[79 rows x 5 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>type</th>\n      <th>dask_compatible</th>\n      <th>koalas_compatible</th>\n      <th>description</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>time_since_last</td>\n      <td>aggregation</td>\n      <td>False</td>\n      <td>False</td>\n      <td>Calculates the time elapsed since the last dat...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>any</td>\n      <td>aggregation</td>\n      <td>True</td>\n      <td>False</td>\n      <td>Determines if any value is 'True' in a list.</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>first</td>\n      <td>aggregation</td>\n      <td>False</td>\n      <td>False</td>\n      <td>Determines the first value in a list.</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>num_true</td>\n      <td>aggregation</td>\n      <td>True</td>\n      <td>False</td>\n      <td>Counts the number of `True` values.</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>skew</td>\n      <td>aggregation</td>\n      <td>False</td>\n      <td>False</td>\n      <td>Computes the extent to which a distribution di...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>74</th>\n      <td>divide_by_feature</td>\n      <td>transform</td>\n      <td>True</td>\n      <td>True</td>\n      <td>Divide a scalar by each value in the list.</td>\n    </tr>\n    <tr>\n      <th>75</th>\n      <td>divide_numeric_scalar</td>\n      <td>transform</td>\n      <td>True</td>\n      <td>True</td>\n      <td>Divide each element in the list by a scalar.</td>\n    </tr>\n    <tr>\n      <th>76</th>\n      <td>greater_than</td>\n      <td>transform</td>\n      <td>True</td>\n      <td>False</td>\n      <td>Determines if values in one list are greater t...</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>subtract_numeric_scalar</td>\n      <td>transform</td>\n      <td>True</td>\n      <td>True</td>\n      <td>Subtract a scalar from each element in the list.</td>\n    </tr>\n    <tr>\n      <th>78</th>\n      <td>cum_min</td>\n      <td>transform</td>\n      <td>False</td>\n      <td>False</td>\n      <td>Calculates the cumulative minimum.</td>\n    </tr>\n  </tbody>\n</table>\n<p>79 rows × 5 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "source": [
    "ft.list_primitives()  # 查看可使用的特征集元"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trans_primitives = [\n",
    "    'add_numeric',\n",
    "    'subtract_numeric', \n",
    "    'multiply_numeric',\n",
    "    'divide_numeric'\n",
    "] # 选用加减乘除四个转换基元"
   ]
  },
  {
   "source": [
    "### 4、深度特征合成"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Built 34 features\n",
      "Fewer chunks (38), than workers (48) consider reducing the chunk size\n",
      "EntitySet scattered to 48 workers in 8 seconds\n",
      "Elapsed: 00:01 | Progress: 100%|██████████\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[<Feature: sepal length (cm)>,\n",
       " <Feature: sepal width (cm)>,\n",
       " <Feature: petal length (cm)>,\n",
       " <Feature: petal width (cm)>,\n",
       " <Feature: petal length (cm) + petal width (cm)>,\n",
       " <Feature: petal length (cm) + sepal length (cm)>,\n",
       " <Feature: petal length (cm) + sepal width (cm)>,\n",
       " <Feature: petal width (cm) + sepal length (cm)>,\n",
       " <Feature: petal width (cm) + sepal width (cm)>,\n",
       " <Feature: sepal length (cm) + sepal width (cm)>,\n",
       " <Feature: petal length (cm) / petal width (cm)>,\n",
       " <Feature: petal length (cm) / sepal length (cm)>,\n",
       " <Feature: petal length (cm) / sepal width (cm)>,\n",
       " <Feature: petal width (cm) / petal length (cm)>,\n",
       " <Feature: petal width (cm) / sepal length (cm)>,\n",
       " <Feature: petal width (cm) / sepal width (cm)>,\n",
       " <Feature: sepal length (cm) / petal length (cm)>,\n",
       " <Feature: sepal length (cm) / petal width (cm)>,\n",
       " <Feature: sepal length (cm) / sepal width (cm)>,\n",
       " <Feature: sepal width (cm) / petal length (cm)>,\n",
       " <Feature: sepal width (cm) / petal width (cm)>,\n",
       " <Feature: sepal width (cm) / sepal length (cm)>,\n",
       " <Feature: petal length (cm) * petal width (cm)>,\n",
       " <Feature: petal length (cm) * sepal length (cm)>,\n",
       " <Feature: petal length (cm) * sepal width (cm)>,\n",
       " <Feature: petal width (cm) * sepal length (cm)>,\n",
       " <Feature: petal width (cm) * sepal width (cm)>,\n",
       " <Feature: sepal length (cm) * sepal width (cm)>,\n",
       " <Feature: petal length (cm) - petal width (cm)>,\n",
       " <Feature: petal length (cm) - sepal length (cm)>,\n",
       " <Feature: petal length (cm) - sepal width (cm)>,\n",
       " <Feature: petal width (cm) - sepal length (cm)>,\n",
       " <Feature: petal width (cm) - sepal width (cm)>,\n",
       " <Feature: sepal length (cm) - sepal width (cm)>]"
      ]
     },
     "metadata": {},
     "execution_count": 15
    }
   ],
   "source": [
    "feature_matrix, feature_names = ft.dfs(entityset=es,\n",
    "                       target_entity='feature',\n",
    "                       max_depth=1,\n",
    "                       verbose=1,\n",
    "                       trans_primitives=trans_primitives,\n",
    "                       n_jobs=-1\n",
    "                       )\n",
    "feature_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "       sepal length (cm)  sepal width (cm)  petal length (cm)  \\\n",
       "index                                                           \n",
       "0                    5.1               3.5                1.4   \n",
       "1                    4.9               3.0                1.4   \n",
       "2                    4.7               3.2                1.3   \n",
       "3                    4.6               3.1                1.5   \n",
       "4                    5.0               3.6                1.4   \n",
       "...                  ...               ...                ...   \n",
       "145                  6.7               3.0                5.2   \n",
       "146                  6.3               2.5                5.0   \n",
       "147                  6.5               3.0                5.2   \n",
       "148                  6.2               3.4                5.4   \n",
       "149                  5.9               3.0                5.1   \n",
       "\n",
       "       petal width (cm)  petal length (cm) + petal width (cm)  \\\n",
       "index                                                           \n",
       "0                   0.2                                   1.6   \n",
       "1                   0.2                                   1.6   \n",
       "2                   0.2                                   1.5   \n",
       "3                   0.2                                   1.7   \n",
       "4                   0.2                                   1.6   \n",
       "...                 ...                                   ...   \n",
       "145                 2.3                                   7.5   \n",
       "146                 1.9                                   6.9   \n",
       "147                 2.0                                   7.2   \n",
       "148                 2.3                                   7.7   \n",
       "149                 1.8                                   6.9   \n",
       "\n",
       "       petal length (cm) + sepal length (cm)  \\\n",
       "index                                          \n",
       "0                                        6.5   \n",
       "1                                        6.3   \n",
       "2                                        6.0   \n",
       "3                                        6.1   \n",
       "4                                        6.4   \n",
       "...                                      ...   \n",
       "145                                     11.9   \n",
       "146                                     11.3   \n",
       "147                                     11.7   \n",
       "148                                     11.6   \n",
       "149                                     11.0   \n",
       "\n",
       "       petal length (cm) + sepal width (cm)  \\\n",
       "index                                         \n",
       "0                                       4.9   \n",
       "1                                       4.4   \n",
       "2                                       4.5   \n",
       "3                                       4.6   \n",
       "4                                       5.0   \n",
       "...                                     ...   \n",
       "145                                     8.2   \n",
       "146                                     7.5   \n",
       "147                                     8.2   \n",
       "148                                     8.8   \n",
       "149                                     8.1   \n",
       "\n",
       "       petal width (cm) + sepal length (cm)  \\\n",
       "index                                         \n",
       "0                                       5.3   \n",
       "1                                       5.1   \n",
       "2                                       4.9   \n",
       "3                                       4.8   \n",
       "4                                       5.2   \n",
       "...                                     ...   \n",
       "145                                     9.0   \n",
       "146                                     8.2   \n",
       "147                                     8.5   \n",
       "148                                     8.5   \n",
       "149                                     7.7   \n",
       "\n",
       "       petal width (cm) + sepal width (cm)  \\\n",
       "index                                        \n",
       "0                                      3.7   \n",
       "1                                      3.2   \n",
       "2                                      3.4   \n",
       "3                                      3.3   \n",
       "4                                      3.8   \n",
       "...                                    ...   \n",
       "145                                    5.3   \n",
       "146                                    4.4   \n",
       "147                                    5.0   \n",
       "148                                    5.7   \n",
       "149                                    4.8   \n",
       "\n",
       "       sepal length (cm) + sepal width (cm)  ...  \\\n",
       "index                                        ...   \n",
       "0                                       8.6  ...   \n",
       "1                                       7.9  ...   \n",
       "2                                       7.9  ...   \n",
       "3                                       7.7  ...   \n",
       "4                                       8.6  ...   \n",
       "...                                     ...  ...   \n",
       "145                                     9.7  ...   \n",
       "146                                     8.8  ...   \n",
       "147                                     9.5  ...   \n",
       "148                                     9.6  ...   \n",
       "149                                     8.9  ...   \n",
       "\n",
       "       petal length (cm) * sepal width (cm)  \\\n",
       "index                                         \n",
       "0                                      4.90   \n",
       "1                                      4.20   \n",
       "2                                      4.16   \n",
       "3                                      4.65   \n",
       "4                                      5.04   \n",
       "...                                     ...   \n",
       "145                                   15.60   \n",
       "146                                   12.50   \n",
       "147                                   15.60   \n",
       "148                                   18.36   \n",
       "149                                   15.30   \n",
       "\n",
       "       petal width (cm) * sepal length (cm)  \\\n",
       "index                                         \n",
       "0                                      1.02   \n",
       "1                                      0.98   \n",
       "2                                      0.94   \n",
       "3                                      0.92   \n",
       "4                                      1.00   \n",
       "...                                     ...   \n",
       "145                                   15.41   \n",
       "146                                   11.97   \n",
       "147                                   13.00   \n",
       "148                                   14.26   \n",
       "149                                   10.62   \n",
       "\n",
       "       petal width (cm) * sepal width (cm)  \\\n",
       "index                                        \n",
       "0                                     0.70   \n",
       "1                                     0.60   \n",
       "2                                     0.64   \n",
       "3                                     0.62   \n",
       "4                                     0.72   \n",
       "...                                    ...   \n",
       "145                                   6.90   \n",
       "146                                   4.75   \n",
       "147                                   6.00   \n",
       "148                                   7.82   \n",
       "149                                   5.40   \n",
       "\n",
       "       sepal length (cm) * sepal width (cm)  \\\n",
       "index                                         \n",
       "0                                     17.85   \n",
       "1                                     14.70   \n",
       "2                                     15.04   \n",
       "3                                     14.26   \n",
       "4                                     18.00   \n",
       "...                                     ...   \n",
       "145                                   20.10   \n",
       "146                                   15.75   \n",
       "147                                   19.50   \n",
       "148                                   21.08   \n",
       "149                                   17.70   \n",
       "\n",
       "       petal length (cm) - petal width (cm)  \\\n",
       "index                                         \n",
       "0                                       1.2   \n",
       "1                                       1.2   \n",
       "2                                       1.1   \n",
       "3                                       1.3   \n",
       "4                                       1.2   \n",
       "...                                     ...   \n",
       "145                                     2.9   \n",
       "146                                     3.1   \n",
       "147                                     3.2   \n",
       "148                                     3.1   \n",
       "149                                     3.3   \n",
       "\n",
       "       petal length (cm) - sepal length (cm)  \\\n",
       "index                                          \n",
       "0                                       -3.7   \n",
       "1                                       -3.5   \n",
       "2                                       -3.4   \n",
       "3                                       -3.1   \n",
       "4                                       -3.6   \n",
       "...                                      ...   \n",
       "145                                     -1.5   \n",
       "146                                     -1.3   \n",
       "147                                     -1.3   \n",
       "148                                     -0.8   \n",
       "149                                     -0.8   \n",
       "\n",
       "       petal length (cm) - sepal width (cm)  \\\n",
       "index                                         \n",
       "0                                      -2.1   \n",
       "1                                      -1.6   \n",
       "2                                      -1.9   \n",
       "3                                      -1.6   \n",
       "4                                      -2.2   \n",
       "...                                     ...   \n",
       "145                                     2.2   \n",
       "146                                     2.5   \n",
       "147                                     2.2   \n",
       "148                                     2.0   \n",
       "149                                     2.1   \n",
       "\n",
       "       petal width (cm) - sepal length (cm)  \\\n",
       "index                                         \n",
       "0                                      -4.9   \n",
       "1                                      -4.7   \n",
       "2                                      -4.5   \n",
       "3                                      -4.4   \n",
       "4                                      -4.8   \n",
       "...                                     ...   \n",
       "145                                    -4.4   \n",
       "146                                    -4.4   \n",
       "147                                    -4.5   \n",
       "148                                    -3.9   \n",
       "149                                    -4.1   \n",
       "\n",
       "       petal width (cm) - sepal width (cm)  \\\n",
       "index                                        \n",
       "0                                     -3.3   \n",
       "1                                     -2.8   \n",
       "2                                     -3.0   \n",
       "3                                     -2.9   \n",
       "4                                     -3.4   \n",
       "...                                    ...   \n",
       "145                                   -0.7   \n",
       "146                                   -0.6   \n",
       "147                                   -1.0   \n",
       "148                                   -1.1   \n",
       "149                                   -1.2   \n",
       "\n",
       "       sepal length (cm) - sepal width (cm)  \n",
       "index                                        \n",
       "0                                       1.6  \n",
       "1                                       1.9  \n",
       "2                                       1.5  \n",
       "3                                       1.5  \n",
       "4                                       1.4  \n",
       "...                                     ...  \n",
       "145                                     3.7  \n",
       "146                                     3.8  \n",
       "147                                     3.5  \n",
       "148                                     2.8  \n",
       "149                                     2.9  \n",
       "\n",
       "[150 rows x 34 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>sepal length (cm)</th>\n      <th>sepal width (cm)</th>\n      <th>petal length (cm)</th>\n      <th>petal width (cm)</th>\n      <th>petal length (cm) + petal width (cm)</th>\n      <th>petal length (cm) + sepal length (cm)</th>\n      <th>petal length (cm) + sepal width (cm)</th>\n      <th>petal width (cm) + sepal length (cm)</th>\n      <th>petal width (cm) + sepal width (cm)</th>\n      <th>sepal length (cm) + sepal width (cm)</th>\n      <th>...</th>\n      <th>petal length (cm) * sepal width (cm)</th>\n      <th>petal width (cm) * sepal length (cm)</th>\n      <th>petal width (cm) * sepal width (cm)</th>\n      <th>sepal length (cm) * sepal width (cm)</th>\n      <th>petal length (cm) - petal width (cm)</th>\n      <th>petal length (cm) - sepal length (cm)</th>\n      <th>petal length (cm) - sepal width (cm)</th>\n      <th>petal width (cm) - sepal length (cm)</th>\n      <th>petal width (cm) - sepal width (cm)</th>\n      <th>sepal length (cm) - sepal width (cm)</th>\n    </tr>\n    <tr>\n      <th>index</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>5.1</td>\n      <td>3.5</td>\n      <td>1.4</td>\n      <td>0.2</td>\n      <td>1.6</td>\n      <td>6.5</td>\n      <td>4.9</td>\n      <td>5.3</td>\n      <td>3.7</td>\n      <td>8.6</td>\n      <td>...</td>\n      <td>4.90</td>\n      <td>1.02</td>\n      <td>0.70</td>\n      <td>17.85</td>\n      <td>1.2</td>\n      <td>-3.7</td>\n      <td>-2.1</td>\n      <td>-4.9</td>\n      <td>-3.3</td>\n      <td>1.6</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>4.9</td>\n      <td>3.0</td>\n      <td>1.4</td>\n      <td>0.2</td>\n      <td>1.6</td>\n      <td>6.3</td>\n      <td>4.4</td>\n      <td>5.1</td>\n      <td>3.2</td>\n      <td>7.9</td>\n      <td>...</td>\n      <td>4.20</td>\n      <td>0.98</td>\n      <td>0.60</td>\n      <td>14.70</td>\n      <td>1.2</td>\n      <td>-3.5</td>\n      <td>-1.6</td>\n      <td>-4.7</td>\n      <td>-2.8</td>\n      <td>1.9</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>4.7</td>\n      <td>3.2</td>\n      <td>1.3</td>\n      <td>0.2</td>\n      <td>1.5</td>\n      <td>6.0</td>\n      <td>4.5</td>\n      <td>4.9</td>\n      <td>3.4</td>\n      <td>7.9</td>\n      <td>...</td>\n      <td>4.16</td>\n      <td>0.94</td>\n      <td>0.64</td>\n      <td>15.04</td>\n      <td>1.1</td>\n      <td>-3.4</td>\n      <td>-1.9</td>\n      <td>-4.5</td>\n      <td>-3.0</td>\n      <td>1.5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4.6</td>\n      <td>3.1</td>\n      <td>1.5</td>\n      <td>0.2</td>\n      <td>1.7</td>\n      <td>6.1</td>\n      <td>4.6</td>\n      <td>4.8</td>\n      <td>3.3</td>\n      <td>7.7</td>\n      <td>...</td>\n      <td>4.65</td>\n      <td>0.92</td>\n      <td>0.62</td>\n      <td>14.26</td>\n      <td>1.3</td>\n      <td>-3.1</td>\n      <td>-1.6</td>\n      <td>-4.4</td>\n      <td>-2.9</td>\n      <td>1.5</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5.0</td>\n      <td>3.6</td>\n      <td>1.4</td>\n      <td>0.2</td>\n      <td>1.6</td>\n      <td>6.4</td>\n      <td>5.0</td>\n      <td>5.2</td>\n      <td>3.8</td>\n      <td>8.6</td>\n      <td>...</td>\n      <td>5.04</td>\n      <td>1.00</td>\n      <td>0.72</td>\n      <td>18.00</td>\n      <td>1.2</td>\n      <td>-3.6</td>\n      <td>-2.2</td>\n      <td>-4.8</td>\n      <td>-3.4</td>\n      <td>1.4</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>145</th>\n      <td>6.7</td>\n      <td>3.0</td>\n      <td>5.2</td>\n      <td>2.3</td>\n      <td>7.5</td>\n      <td>11.9</td>\n      <td>8.2</td>\n      <td>9.0</td>\n      <td>5.3</td>\n      <td>9.7</td>\n      <td>...</td>\n      <td>15.60</td>\n      <td>15.41</td>\n      <td>6.90</td>\n      <td>20.10</td>\n      <td>2.9</td>\n      <td>-1.5</td>\n      <td>2.2</td>\n      <td>-4.4</td>\n      <td>-0.7</td>\n      <td>3.7</td>\n    </tr>\n    <tr>\n      <th>146</th>\n      <td>6.3</td>\n      <td>2.5</td>\n      <td>5.0</td>\n      <td>1.9</td>\n      <td>6.9</td>\n      <td>11.3</td>\n      <td>7.5</td>\n      <td>8.2</td>\n      <td>4.4</td>\n      <td>8.8</td>\n      <td>...</td>\n      <td>12.50</td>\n      <td>11.97</td>\n      <td>4.75</td>\n      <td>15.75</td>\n      <td>3.1</td>\n      <td>-1.3</td>\n      <td>2.5</td>\n      <td>-4.4</td>\n      <td>-0.6</td>\n      <td>3.8</td>\n    </tr>\n    <tr>\n      <th>147</th>\n      <td>6.5</td>\n      <td>3.0</td>\n      <td>5.2</td>\n      <td>2.0</td>\n      <td>7.2</td>\n      <td>11.7</td>\n      <td>8.2</td>\n      <td>8.5</td>\n      <td>5.0</td>\n      <td>9.5</td>\n      <td>...</td>\n      <td>15.60</td>\n      <td>13.00</td>\n      <td>6.00</td>\n      <td>19.50</td>\n      <td>3.2</td>\n      <td>-1.3</td>\n      <td>2.2</td>\n      <td>-4.5</td>\n      <td>-1.0</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>148</th>\n      <td>6.2</td>\n      <td>3.4</td>\n      <td>5.4</td>\n      <td>2.3</td>\n      <td>7.7</td>\n      <td>11.6</td>\n      <td>8.8</td>\n      <td>8.5</td>\n      <td>5.7</td>\n      <td>9.6</td>\n      <td>...</td>\n      <td>18.36</td>\n      <td>14.26</td>\n      <td>7.82</td>\n      <td>21.08</td>\n      <td>3.1</td>\n      <td>-0.8</td>\n      <td>2.0</td>\n      <td>-3.9</td>\n      <td>-1.1</td>\n      <td>2.8</td>\n    </tr>\n    <tr>\n      <th>149</th>\n      <td>5.9</td>\n      <td>3.0</td>\n      <td>5.1</td>\n      <td>1.8</td>\n      <td>6.9</td>\n      <td>11.0</td>\n      <td>8.1</td>\n      <td>7.7</td>\n      <td>4.8</td>\n      <td>8.9</td>\n      <td>...</td>\n      <td>15.30</td>\n      <td>10.62</td>\n      <td>5.40</td>\n      <td>17.70</td>\n      <td>3.3</td>\n      <td>-0.8</td>\n      <td>2.1</td>\n      <td>-4.1</td>\n      <td>-1.2</td>\n      <td>2.9</td>\n    </tr>\n  </tbody>\n</table>\n<p>150 rows × 34 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 16
    }
   ],
   "source": [
    "# 处理异常值\n",
    "feature_matrix.replace([np.inf, -np.inf], np.nan)  # np.inf都用np.nan代替\n",
    "feature_matrix"
   ]
  }
 ]
}